R Markdown

https://www.multpl.com/sitemap (URL for pulling any S&P 500 Data)

Price to Earnings Anlaysis

library(rvest)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
# Read in data
# PE Ratio
PE_URL <- read_html('https://www.multpl.com/s-p-500-pe-ratio/table/by-month')

allTables <- PE_URL %>% html_table(fill = TRUE, header = TRUE)

P_E_Ratio <- allTables[[1]]

colnames(P_E_Ratio)[2] <- 'P/E Ratio'

# Cleaning Text and Date in columns
P_E_Ratio$`P/E Ratio` <- gsub(pattern = '\n\nestimate', replacement = '', x = P_E_Ratio$`P/E Ratio`)
class(P_E_Ratio$Date)
## [1] "character"
P_E_Ratio$Date <- as.Date(P_E_Ratio$Date, format = '%B %d, %Y')
P_E_Ratio <-  P_E_Ratio[nrow(P_E_Ratio):1,]
P_E_Ratio$`P/E Ratio` <-  as.numeric(P_E_Ratio$`P/E Ratio`)
str(P_E_Ratio)
## tibble [1,814 × 2] (S3: tbl_df/tbl/data.frame)
##  $ Date     : Date[1:1814], format: "1871-01-01" "1871-02-01" ...
##  $ P/E Ratio: num [1:1814] 11.1 11.2 11.5 11.8 12.2 ...
head(P_E_Ratio)
## # A tibble: 6 × 2
##   Date       `P/E Ratio`
##   <date>           <dbl>
## 1 1871-01-01        11.1
## 2 1871-02-01        11.2
## 3 1871-03-01        11.5
## 4 1871-04-01        11.8
## 5 1871-05-01        12.2
## 6 1871-06-01        12.0
summary(P_E_Ratio)
##       Date              P/E Ratio     
##  Min.   :1871-01-01   Min.   :  5.31  
##  1st Qu.:1908-10-08   1st Qu.: 11.49  
##  Median :1946-07-16   Median : 14.88  
##  Mean   :1946-07-17   Mean   : 15.97  
##  3rd Qu.:1984-04-23   3rd Qu.: 18.23  
##  Max.   :2022-02-11   Max.   :123.73
P_E.ts <- ts(P_E_Ratio$`P/E Ratio`, start = c(1871, 1), end = c(2022, 2), freq = 12)
P_E.window.ts <- window(P_E.ts, start = c(2000, 1), end = c(2022, 2))

# Acf Plot
Acf(P_E.window.ts, lag.max = 12, main = '', na.action = na.pass)

# PE Plot
plot(P_E.window.ts, ylim = c(0, 150),  ylab = "PE Ratio", xlab = "Time", bty = "l", 
     xaxt = "n", xlim = c(2000,2029), main = "")
axis(1, at = seq(2000, 2027.5, 1), labels = format(seq(2000, 2027.5, 1)))
lines(c(2022.17 - 1, 2022.17 - 1), c(0, 130)) 
lines(c(2022.17, 2022.17), c(0, 130))
text(2009, 140, "Training")
text(2021.5, 140, "Validation")
text(2026, 140, "Future")
arrows(2000, 130, 2022.17 - 2, 130, code = 3, length = 0.1, lwd = 1,angle = 30)
arrows(2022.17 - 1, 130, 2022.17, 130, code = 3, length = 0.1, lwd = 1,angle = 30)
arrows(2023.17, 130, 2028, 130, code = 3, length = 0.1, lwd = 1, angle = 30)

# Test for random walk
fit <- Arima(P_E.window.ts, order=c(1,0,0))
fit
## Series: P_E.window.ts 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     mean
##       0.9628  26.1142
## s.e.  0.0152   6.2424
## 
## sigma^2 estimated as 17.25:  log likelihood=-756.51
## AIC=1519.03   AICc=1519.12   BIC=1529.78
fit2 <- Arima(diff(P_E_Ratio$`P/E Ratio`, 1), order=c(1,0,0))
fit2
## Series: diff(P_E_Ratio$`P/E Ratio`, 1) 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1    mean
##       0.5631  0.0076
## s.e.  0.0194  0.0762
## 
## sigma^2 estimated as 2.017:  log likelihood=-3207.72
## AIC=6421.45   AICc=6421.46   BIC=6437.95
fit3 <- auto.arima(P_E.window.ts)
fit3
## Series: P_E.window.ts 
## ARIMA(5,0,2)(0,0,1)[12] with non-zero mean 
## 
## Coefficients:
##           ar1     ar2     ar3      ar4      ar5     ma1     ma2    sma1
##       -0.0200  0.9532  0.6600  -0.4976  -0.3494  1.4863  0.7518  0.2069
## s.e.   0.1308  0.1054  0.0972   0.0635   0.0744  0.1284  0.1146  0.0771
##          mean
##       25.9375
## s.e.   2.7020
## 
## sigma^2 estimated as 8.696:  log likelihood=-662.95
## AIC=1345.89   AICc=1346.76   BIC=1381.73
Acf(diff(P_E_Ratio$`P/E Ratio`), lag.max = 12, main = '')

# Moving average
ma.trailing <- rollmean(P_E.window.ts, k = 12, align = 'right')
ma.centered <- ma(P_E.window.ts, order = 12)

plot(P_E.window.ts, ylim = c(0, 130),  ylab = 'PE Ratio', xlab = 'Time', bty = 'l', xaxt = 'n', xlim = c(2000,2022.5), main = '')
axis(1, at = seq(2000, 2022.5, 1), labels = format(seq(2000, 2022.5, 1)))
lines(ma.centered, lwd = 2)
lines(ma.trailing, lwd = 2, lty = 2)
legend(2013,120, c('PE Ratio','Centered Moving Average', 'Trailing Moving Average'), lty=c(1,1,2), lwd=c(1,2,2), bty = 'n')  

# Data partition
nValid <- 12
nTrain <- length(P_E.window.ts) - nValid
train.ts <- window(P_E.window.ts, start = c(2000, 1), end = c(2000, nTrain))
valid.ts <- window(P_E.window.ts, start = c(2000, nTrain + 1), end = c(2000, nTrain + nValid))

# Naive model
naive.pred <- naive(train.ts, h = nValid)

# Seasonal naive model
snaive.pred <- snaive(train.ts, h = nValid)

# Trailing moving average model
ma.trailing <- rollmean(train.ts, k = 12, align = 'right')
last.ma <- tail(ma.trailing, 1)
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c(2000, nTrain + 1), end = c(2000, nTrain + nValid), freq = 12)

# Polynomial trend + seasonality model
train.lm.trend.season <- tslm(train.ts ~ trend + I(trend^2) + season)
train.lm.trend.season.pred<-forecast(train.lm.trend.season, h = nValid, level = 0)

# Apply ARIMA on Polynomial trend + seasonality model's residuals
train.res.arima <- Arima(train.lm.trend.season$residuals, order = c(8,0,0))
train.res.arima.pred <- forecast(train.res.arima, h = nValid)

# Holt-Winter model
hwin <- ets(train.ts)
hwin.pred <- forecast(hwin, h = nValid, level = 0)

# Auto ARIMA model
train.arima <- auto.arima(train.ts)
summary(train.arima)
## Series: train.ts 
## ARIMA(5,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2     ar3      ar4     ar5     mean
##       1.4708  -0.4747  0.3458  -0.6963  0.2942  25.9195
## s.e.  0.0596   0.1011  0.1031   0.1006  0.0595   3.0566
## 
## sigma^2 estimated as 9.078:  log likelihood=-639.79
## AIC=1293.58   AICc=1294.04   BIC=1318.34
## 
## Training set error measures:
##                        ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.005109035 2.977176 1.348578 -0.794982 4.727781 0.1322103
##                      ACF1
## Training set -0.003004687
train.arima.pred <-forecast(train.arima, h = nValid, level = 0)

# TBATS model
train.tbats <- tbats(train.ts) 
train.tbats.pred <- forecast(train.tbats, h = nValid, level = 0) 
summary(train.tbats)
##                   Length Class  Mode     
## lambda               1   -none- numeric  
## alpha                1   -none- numeric  
## beta                 1   -none- numeric  
## damping.parameter    1   -none- numeric  
## gamma.values         0   -none- NULL     
## ar.coefficients      1   -none- numeric  
## ma.coefficients      1   -none- numeric  
## likelihood           1   -none- numeric  
## optim.return.code    1   -none- numeric  
## variance             1   -none- numeric  
## AIC                  1   -none- numeric  
## parameters           2   -none- list     
## seed.states          4   -none- numeric  
## fitted.values      254   ts     numeric  
## errors             254   ts     numeric  
## x                 1016   -none- numeric  
## seasonal.periods     0   -none- NULL     
## y                  254   ts     numeric  
## call                 2   -none- call     
## series               1   -none- character
## method               1   -none- character
# Plot model forecasts
plot(P_E.window.ts, xlab = 'Time', ylab = 'PE Ratio of S&P 500', main = 'Model Forecasts', xlim = c(2018.2, 2022.2), ylim = c(10,45))
lines(valid.ts)
lines(naive.pred$mean, col = 'orange')
lines(snaive.pred$mean, col = 'yellow')
lines(ma.trailing.pred, col = 'pink')
lines(train.lm.trend.season.pred$mean, col = 'blue')
lines(train.lm.trend.season.pred$mean+train.res.arima.pred$mean, col = 'goldenrod3')
lines(hwin.pred$mean, col = 'green')
lines(train.arima.pred$mean, col = 'purple') 
lines(train.tbats.pred$mean, col = 'red')
legend('topleft',   
       inset = 0.05,
       legend = c('Naive', 'Seasonal Naive', 'Trailing moving average', 'Polynomial trend + Seasonality', 'Polynomial trend + Seasonality + ARIMA', 'Holt-Winter', 'Auto ARIMA', 'TBATS'),
       col = c('orange', 'yellow', 'pink', 'blue', 'goldenrod3', 'green', 'purple', 'red'),
       cex = 0.7,
       lwd = 1)

# Model accuracy
accuracy(naive.pred$mean, valid.ts)
##               ME     RMSE    MAE       MPE     MAPE      ACF1 Theil's U
## Test set -6.1925 6.401037 6.1925 -23.31453 23.31453 0.5498945  6.339083
accuracy(snaive.pred$mean, valid.ts)
##               ME     RMSE      MAE       MPE     MAPE      ACF1 Theil's U
## Test set -5.3625 8.124154 7.509167 -21.01205 28.11372 0.7089354  7.890078
accuracy(ma.trailing.pred, valid.ts)
##               ME    RMSE    MAE       MPE     MAPE      ACF1 Theil's U
## Test set -5.3625 5.60202 5.3625 -20.23537 20.23537 0.5498945  5.571346
accuracy(train.lm.trend.season.pred$mean, valid.ts)
##                ME     RMSE      MAE      MPE     MAPE     ACF1 Theil's U
## Test set 1.991849 2.480185 1.991849 7.162022 7.162022 0.342672  1.974617
accuracy(train.lm.trend.season.pred$mean + train.res.arima.pred$mean, valid.ts)
##                ME    RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 2.832528 3.72647 3.209136 10.79542 12.12842 0.5328737  3.788331
accuracy(hwin.pred$mean, valid.ts)
##                 ME     RMSE      MAE       MPE     MAPE      ACF1 Theil's U
## Test set -4.806277 4.966847 4.806277 -18.09073 18.09073 0.4916211  4.905126
accuracy(train.arima.pred$mean, valid.ts)
##                ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 2.480806 2.825181 2.641931 9.352676 9.880953 0.2414805  2.822854
accuracy(train.tbats.pred$mean, valid.ts)
##                 ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 0.5196247 1.584093 1.193899 2.014651 4.433675 0.7440661  1.557351
# Forecast using our best model
P_E.tbats <- tbats(P_E.window.ts)
P_E.tbats.pred <- forecast(P_E.tbats, h = 12)
plot(P_E.tbats.pred, include = 36)
lines(P_E.tbats.pred$mean, col = 'blue')

P_E.tbats.pred
##          Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## Mar 2022       24.27405 22.31664 26.40315 21.345171 27.60481
## Apr 2022       23.79090 20.46717 27.65438 18.900006 29.94744
## May 2022       23.41131 18.94581 28.92933 16.937821 32.35892
## Jun 2022       23.11201 17.63927 30.28272 15.288165 34.93977
## Jul 2022       22.87532 16.50199 31.71014 13.882114 37.69458
## Aug 2022       22.68772 15.50415 33.19966 12.674209 40.61260
## Sep 2022       22.53875 14.62293 34.73963 11.629701 43.68083
## Oct 2022       22.42027 13.83996 36.32008 10.720848 46.88701
## Nov 2022       22.32594 13.14025 37.93290  9.925208 50.22037
## Dec 2022       22.25076 12.51142 39.57156  9.224529 53.67172
## Jan 2023       22.19080 11.94326 41.23092  8.603919 57.23340
## Feb 2023       22.14295 11.42729 42.90694  8.051189 60.89909
# Check residuals
checkresiduals(naive.pred$residuals) 
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(snaive.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.lm.trend.season.pred$residuals) 
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.res.arima.pred$residuals) 
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(hwin.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.arima.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.tbats.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(P_E.tbats.pred$residuals) 
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

Price to Sales Anlaysis

#Price to Sales
PS_URL <- read_html('https://www.multpl.com/s-p-500-price-to-sales/table/by-quarter')

allTables <- PS_URL %>% html_table(fill = TRUE, header = TRUE)

PS_Share <- allTables[[1]]

colnames(PS_Share)[2] <- 'Price to Sales'

# Cleaning Text and Date in columns
PS_Share$`Price to Sales` <- gsub(pattern = '\n\nestimate', replacement = '', x = PS_Share$`Price to Sales`)
class(PS_Share$Date)
## [1] "character"
PS_Share$Date <- as.Date(PS_Share$Date, format = '%B %d,%Y')
PS_Share <-  PS_Share[nrow(PS_Share):1,]
PS_Share$`Price to Sales` <-  as.numeric(PS_Share$`Price to Sales`)
str(PS_Share)
## tibble [86 × 2] (S3: tbl_df/tbl/data.frame)
##  $ Date          : Date[1:86], format: "2000-12-31" "2001-03-31" ...
##  $ Price to Sales: num [1:86] 1.77 1.54 1.64 1.41 1.56 1.6 1.39 1.17 1.3 1.25 ...
head(PS_Share)
## # A tibble: 6 × 2
##   Date       `Price to Sales`
##   <date>                <dbl>
## 1 2000-12-31             1.77
## 2 2001-03-31             1.54
## 3 2001-06-30             1.64
## 4 2001-09-30             1.41
## 5 2001-12-31             1.56
## 6 2002-03-31             1.6
summary(PS_Share)
##       Date            Price to Sales 
##  Min.   :2000-12-31   Min.   :0.800  
##  1st Qu.:2006-04-22   1st Qu.:1.343  
##  Median :2011-08-15   Median :1.530  
##  Mean   :2011-08-15   Mean   :1.663  
##  3rd Qu.:2016-12-08   3rd Qu.:1.867  
##  Max.   :2022-02-11   Max.   :3.150
PS.ts <- ts(PS_Share$`Price to Sales`, start = c(2001, 1), end = c(2021, 4), freq = 4)
head(PS.ts)
##      Qtr1 Qtr2 Qtr3 Qtr4
## 2001 1.77 1.54 1.64 1.41
## 2002 1.56 1.60
# Acf Plot
Acf(PS.ts, lag.max = 4, main = '', na.action = na.pass)

fit <- Arima(diff(PS_Share$`Price to Sales`, 1), order=c(1,0,0))
fit
## Series: diff(PS_Share$`Price to Sales`, 1) 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##           ar1    mean
##       -0.1912  0.0145
## s.e.   0.1102  0.0122
## 
## sigma^2 estimated as 0.01841:  log likelihood=50.17
## AIC=-94.33   AICc=-94.04   BIC=-87.01
fit2 <- auto.arima(PS.ts)
fit2
## Series: PS.ts 
## ARIMA(0,1,0) 
## 
## sigma^2 estimated as 0.01753:  log likelihood=50.04
## AIC=-98.07   AICc=-98.02   BIC=-95.65
Acf(diff(PS_Share$`Price to Sales`), lag.max = 4, main = '')

# Moving average
ma.trailing <- rollmean(PS.ts, k = 4, align = 'right')
ma.centered <- ma(PS.ts, order = 4)

plot(PS.ts, ylim = c(0, 4),  ylab = 'PS Ratio', xlab = 'Time', bty = 'l', xaxt = 'n', xlim = c(2001,2022.4), main = '')
axis(1, at = seq(2001, 2022.4, 1), labels = format(seq(2001, 2022.4, 1)))
lines(ma.centered, lwd = 2)
lines(ma.trailing, lwd = 2, lty = 2)
legend(2013,120, c('PE Ratio','Centered Moving Average', 'Trailing Moving Average'), lty=c(1,1,2), lwd=c(1,2,2), bty = 'n')  

nValid <- 4
nTrain <- length(PS.ts) - nValid
train.ts <- window(PS.ts, start = c(2001, 1), end = c(2001, nTrain))
valid.ts <- window(PS.ts, start = c(2001, nTrain + 1), end = c(2001, nTrain + nValid))

# Naive model
naive.pred <- naive(train.ts, h = nValid)

# Seasonal naive model
snaive.pred <- snaive(train.ts, h = nValid)

# Trailing moving average model
ma.trailing <- rollmean(train.ts, k = 4, align = 'right')
last.ma <- tail(ma.trailing, 1)
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c(2001, nTrain + 1), end = c(2001, nTrain + nValid), freq = 4)

# Polynomial trend + seasonality model
train.lm.trend.season <- tslm(train.ts ~ trend + I(trend^2) + season)
train.lm.trend.season.pred<-forecast(train.lm.trend.season, h = nValid, level = 0)

# Apply ARIMA on Polynomial trend + seasonality model's residuals
train.res.arima <- Arima(train.lm.trend.season$residuals, order = c(1,0,0))
train.res.arima.pred <- forecast(train.res.arima, h = nValid)

# Holt-Winter model
hwin <- ets(train.ts)
hwin.pred <- forecast(hwin, h = nValid, level = 0)

# Auto ARIMA model
train.arima <- auto.arima(train.ts)
summary(train.arima)
## Series: train.ts 
## ARIMA(0,1,0) 
## 
## sigma^2 estimated as 0.01705:  log likelihood=48.73
## AIC=-95.46   AICc=-95.41   BIC=-93.09
## 
## Training set error measures:
##                       ME      RMSE        MAE        MPE     MAPE      MASE
## Training set 0.008772125 0.1297595 0.09377212 0.04772998 6.364785 0.5728844
##                    ACF1
## Training set -0.1915076
train.arima.pred <-forecast(train.arima, h = nValid, level = 0)

# TBATS model
train.tbats <- tbats(train.ts) 
train.tbats.pred <- forecast(train.tbats, h = nValid, level = 0) 
summary(train.tbats)
##                   Length Class  Mode     
## lambda             0     -none- NULL     
## alpha              1     -none- numeric  
## beta               0     -none- NULL     
## damping.parameter  0     -none- NULL     
## gamma.values       0     -none- NULL     
## ar.coefficients    0     -none- NULL     
## ma.coefficients    0     -none- NULL     
## likelihood         1     -none- numeric  
## optim.return.code  1     -none- numeric  
## variance           1     -none- numeric  
## AIC                1     -none- numeric  
## parameters         2     -none- list     
## seed.states        1     -none- numeric  
## fitted.values     80     ts     numeric  
## errors            80     ts     numeric  
## x                 80     -none- numeric  
## seasonal.periods   0     -none- NULL     
## y                 80     ts     numeric  
## call               2     -none- call     
## series             1     -none- character
## method             1     -none- character
# Plot model forecasts
plot(PS.ts, xlab = 'Time', ylab = 'PS Ratio of S&P 500', main = 'Model Forecasts', xlim = c(2018.2, 2022.2), ylim = c(1, 3.5), bty = "l")
lines(naive.pred$mean, col = 'orange')
lines(snaive.pred$mean, col = 'yellow')
lines(ma.trailing.pred, col = 'pink')
lines(train.lm.trend.season.pred$mean, col = 'Blue')
lines(train.lm.trend.season.pred$mean+train.res.arima.pred$mean, col = 'goldenrod3')
lines(hwin.pred$mean, col = 'green')
lines(train.arima.pred$mean, col = 'purple') 
lines(train.tbats.pred$mean, col = 'red')
legend('topleft',   
       inset = 0.05,
       legend = c('Naive', 'Seasonal Naive', 'Trailing moving average', 'Polynomial trend + Seasonality', 'Polynomial trend + Seasonality + ARIMA', 'Holt-Winter', 'Auto ARIMA', 'TBATS'),
       col = c('orange', 'yellow', 'pink', 'blue', 'goldenrod3', 'green', 'purple', 'red'),
       cex = 0.7,
       lwd = 1)

# Model accuracy
accuracy(naive.pred$mean, valid.ts)
##            ME      RMSE  MAE      MPE     MAPE         ACF1 Theil's U
## Test set 0.38 0.3852921 0.38 13.29008 13.29008 6.167906e-16  4.579196
accuracy(snaive.pred$mean, valid.ts)
##            ME      RMSE  MAE      MPE     MAPE       ACF1 Theil's U
## Test set 0.64 0.6851277 0.64 22.41084 22.41084 -0.2341137  8.374095
accuracy(ma.trailing.pred, valid.ts)
##            ME      RMSE  MAE      MPE     MAPE         ACF1 Theil's U
## Test set 0.64 0.6431563 0.64 22.41744 22.41744 6.167906e-16  7.458677
accuracy(train.lm.trend.season.pred$mean, valid.ts)
##                 ME      RMSE       MAE      MPE     MAPE       ACF1 Theil's U
## Test set 0.3267123 0.3307319 0.3267123 11.44093 11.44093 -0.2822204  3.841602
accuracy(train.lm.trend.season.pred$mean + train.res.arima.pred$mean, valid.ts)
##                 ME      RMSE       MAE      MPE     MAPE       ACF1 Theil's U
## Test set 0.2919218 0.2967909 0.2919218 10.21278 10.21278 -0.2481899  3.512237
accuracy(hwin.pred$mean, valid.ts)
##                 ME      RMSE       MAE      MPE     MAPE         ACF1 Theil's U
## Test set 0.4418436 0.4464031 0.4418436 15.46111 15.46111 6.167906e-16  5.263174
accuracy(train.arima.pred$mean, valid.ts)
##            ME      RMSE  MAE      MPE     MAPE         ACF1 Theil's U
## Test set 0.38 0.3852921 0.38 13.29008 13.29008 6.167906e-16  4.579196
accuracy(train.tbats.pred$mean, valid.ts)
##                 ME      RMSE       MAE      MPE     MAPE         ACF1 Theil's U
## Test set 0.4531414 0.4575884 0.4531414 15.85772 15.85772 6.167906e-16  5.388213
# Forecast using our best model
P_S.lm.trend.season <- tslm(PS.ts ~ trend + I(trend^2) + season)
P_S.lm.trend.season.pred <- forecast(P_S.lm.trend.season, h = nValid, level = 0)
P_S.Arima <- Arima(P_S.lm.trend.season.pred$residuals, order = c(1,0,0))
P_S.Arima.pred <- forecast(P_S.Arima, h = 4)

plot(P_S.lm.trend.season.pred, include = 12)
lines(valid.ts)

P_S.lm.trend.season.pred$mean + P_S.Arima.pred$mean
##          Qtr1     Qtr2     Qtr3     Qtr4
## 2022 2.899738 2.898630 2.950132 2.972535
# Check residuals
checkresiduals(naive.pred$residuals) 
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(snaive.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.lm.trend.season.pred$residuals) 
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.res.arima.pred$residuals) 
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(hwin.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.arima.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.tbats.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

Price to Book Anlaysis

#Book Value per Share
P_B_URL <- read_html('https://www.multpl.com/s-p-500-book-value/table/by-quarter')

allTables <- P_B_URL %>% html_table(fill = TRUE, header = TRUE)

P_B_Ratio <- allTables[[1]]

colnames(P_B_Ratio)[2] <- 'Book Value per Share'

# Cleaning Text and Date in columns
P_B_Ratio$`Book Value per Share` <- gsub(pattern = '\n\nestimate', replacement = '', x = P_B_Ratio$`Book Value per Share`)
class(P_B_Ratio$Date)
## [1] "character"
P_B_Ratio$Date <- as.Date(P_B_Ratio$Date, format = '%B %d,%Y')
P_B_Ratio <-  P_B_Ratio[nrow(P_B_Ratio):1,]
P_B_Ratio$`Book Value per Share` <-  as.numeric(P_B_Ratio$`Book Value per Share`)
str(P_B_Ratio)
## tibble [88 × 2] (S3: tbl_df/tbl/data.frame)
##  $ Date                : Date[1:88], format: "1999-12-31" "2000-03-31" ...
##  $ Book Value per Share: num [1:88] 291 296 313 320 326 ...
head(P_B_Ratio)
## # A tibble: 6 × 2
##   Date       `Book Value per Share`
##   <date>                      <dbl>
## 1 1999-12-31                   291.
## 2 2000-03-31                   296.
## 3 2000-06-30                   313.
## 4 2000-09-30                   320.
## 5 2000-12-31                   326.
## 6 2001-03-31                   347.
summary(P_B_Ratio)
##       Date            Book Value per Share
##  Min.   :1999-12-31   Min.   :290.7       
##  1st Qu.:2005-06-07   1st Qu.:434.4       
##  Median :2010-11-15   Median :573.5       
##  Mean   :2010-11-14   Mean   :597.8       
##  3rd Qu.:2016-04-22   3rd Qu.:755.9       
##  Max.   :2021-09-30   Max.   :983.0
P_B.ts <- ts(P_B_Ratio$`Book Value per Share`, start = c(2000, 1), end = c(2021, 3), freq = 4)
P_B.window.ts <- window(P_B.ts, start = c(2000, 1), end = c(2021, 3))

library(forecast)
# Acf Plot
Acf(P_B.window.ts, lag.max = 4, main = '', na.action = na.pass)

library(zoo)
# Test for random walk
fit2 <- Arima(diff(P_B_Ratio$`Book Value per Share`, 1), order=c(1,0,0))
fit2
## Series: diff(P_B_Ratio$`Book Value per Share`, 1) 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1    mean
##       0.2123  7.9663
## s.e.  0.1042  1.6720
## 
## sigma^2 estimated as 155.4:  log likelihood=-341.96
## AIC=689.92   AICc=690.21   BIC=697.32
fit3 <- auto.arima(P_B.window.ts)
fit3
## Series: P_B.window.ts 
## ARIMA(0,1,1) with drift 
## 
## Coefficients:
##          ma1   drift
##       0.2819  7.9517
## s.e.  0.1222  1.6969
## 
## sigma^2 estimated as 155.1:  log likelihood=-337.94
## AIC=681.88   AICc=682.18   BIC=689.25
Acf(diff(P_B_Ratio$`Book Value per Share`), lag.max = 4, main = '')

# Moving average
ma.trailing <- rollmean(P_B.window.ts, k = 4, align = 'right')
ma.centered <- ma(P_B.window.ts, order = 4)

plot(P_B.window.ts, ylim = c(0, 1000),  ylab = 'PB Ratio', xlab = 'Time', bty = 'l', xaxt = 'n', xlim = c(2000,2022.3), main = '')
axis(1, at = seq(2000, 2022.3, 1), labels = format(seq(2000, 2022.3, 1)))
lines(ma.centered, lwd = 2)
lines(ma.trailing, lwd = 2, lty = 2)
legend(2013,250, c('PB Ratio','Centered Moving Average', 'Trailing Moving Average'), lty=c(1,1,2), lwd=c(1,2,2), bty = 'n')  

# Data partition
nValid <- 4
nTrain <- length(P_B.window.ts) - nValid
train.ts <- window(P_B.window.ts, start = c(2000, 1), end = c(2000, nTrain))
valid.ts <- window(P_B.window.ts, start = c(2000, nTrain + 1), end = c(2000, nTrain + nValid))

# Naive model
naive.pred <- naive(train.ts, h = nValid)

# Seasonal naive model
snaive.pred <- snaive(train.ts, h = nValid)

# Trailing moving average model
ma.trailing <- rollmean(train.ts, k = 4, align = 'right')
last.ma <- tail(ma.trailing, 1)
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c(2000, nTrain + 1), end = c(2000, nTrain + nValid), freq = 4)

# Polynomial trend + seasonality model
train.lm.trend.season <- tslm(train.ts ~ trend + I(trend^2) + season)
train.lm.trend.season.pred<-forecast(train.lm.trend.season, h = nValid, level = 0)

# Apply ARIMA on Polynomial trend + seasonality model's residuals
train.res.arima <- Arima(train.lm.trend.season$residuals, order = c(2,0,0))
train.res.arima.pred <- forecast(train.res.arima, h = nValid)

# Holt-Winter model
hwin <- ets(train.ts)
hwin.pred <- forecast(hwin, h = nValid, level = 0)

# Auto ARIMA model
train.arima <- auto.arima(train.ts)
summary(train.arima)
## Series: train.ts 
## ARIMA(0,1,1) with drift 
## 
## Coefficients:
##          ma1   drift
##       0.2556  7.4973
## s.e.  0.1254  1.7061
## 
## sigma^2 estimated as 155.9:  log likelihood=-322.39
## AIC=650.79   AICc=651.1   BIC=658.01
## 
## Training set error measures:
##                      ME     RMSE      MAE         MPE     MAPE     MASE
## Training set 0.01513591 12.25872 8.200039 -0.07159864 1.558848 0.219262
##                     ACF1
## Training set -0.02016012
train.arima.pred <-forecast(train.arima, h = nValid, level = 0)

# TBATS model
train.tbats <- tbats(train.ts) 
train.tbats.pred <- forecast(train.tbats, h = nValid, level = 0) 
summary(train.tbats)
##                   Length Class  Mode     
## lambda              0    -none- NULL     
## alpha               1    -none- numeric  
## beta                1    -none- numeric  
## damping.parameter   1    -none- numeric  
## gamma.values        0    -none- NULL     
## ar.coefficients     0    -none- NULL     
## ma.coefficients     0    -none- NULL     
## likelihood          1    -none- numeric  
## optim.return.code   1    -none- numeric  
## variance            1    -none- numeric  
## AIC                 1    -none- numeric  
## parameters          2    -none- list     
## seed.states         2    -none- numeric  
## fitted.values      83    ts     numeric  
## errors             83    ts     numeric  
## x                 166    -none- numeric  
## seasonal.periods    0    -none- NULL     
## y                  83    ts     numeric  
## call                2    -none- call     
## series              1    -none- character
## method              1    -none- character
# Plot model forecasts
plot(P_B.window.ts, xlab = 'Time', ylab = 'PB Ratio of S&P 500', main = 'Model Forecasts', xlim = c(2018.2, 2021.5), ylim = c(800, 1000))
lines(valid.ts)
lines(naive.pred$mean, col = 'orange')
lines(snaive.pred$mean, col = 'yellow')
lines(ma.trailing.pred, col = 'pink')
lines(train.lm.trend.season.pred$mean, col = 'blue')
lines(train.lm.trend.season.pred$mean+train.res.arima.pred$mean, col = 'goldenrod3')
lines(hwin.pred$mean, col = 'green')
lines(train.arima.pred$mean, col = 'purple') 
lines(train.tbats.pred$mean, col = 'red')
legend('topleft',   
       inset = 0.05,
       legend = c('Naive', 'Seasonal Naive', 'Trailing moving average', 'Polynomial trend + Seasonality', 'Polynomial trend + Seasonality + ARIMA', 'Holt-Winter', 'Auto ARIMA', 'TBATS'),
       col = c('orange', 'yellow', 'pink', 'blue', 'goldenrod3', 'green', 'purple', 'red'),
       cex = 0.7,
       lwd = 1)

# Model accuracy
accuracy(naive.pred$mean, valid.ts) # MAPE 3.98
##             ME     RMSE   MAE      MPE     MAPE      ACF1 Theil's U
## Test set 37.06 41.58819 37.06 3.908406 3.908406 0.1442764  2.401469
accuracy(snaive.pred$mean, valid.ts)# MAPE 3.980919
##               ME     RMSE     MAE      MPE     MAPE      ACF1 Theil's U
## Test set 37.7825 44.34693 37.7825 3.980919 3.980919 0.2706733  2.575777
accuracy(ma.trailing.pred, valid.ts)# MAPE 3.985378
##               ME    RMSE     MAE      MPE     MAPE      ACF1 Theil's U
## Test set 37.7825 42.2333 37.7825 3.985378 3.985378 0.1442764  2.435947
accuracy(train.lm.trend.season.pred$mean, valid.ts) # MAPE 0.971184
##                 ME     RMSE      MAE        MPE     MAPE         ACF1 Theil's U
## Test set -3.071286 9.415854 9.138326 -0.3455418 0.971184 -0.005754907 0.4652116
accuracy(train.lm.trend.season.pred$mean + train.res.arima.pred$mean, valid.ts) # MAPE 0.5372006
##                ME     RMSE      MAE       MPE      MAPE       ACF1 Theil's U
## Test set 5.161234 8.485842 5.161234 0.5372006 0.5372006 -0.1607624  0.496166
accuracy(hwin.pred$mean, valid.ts) # MAPE 2.033524
##                ME     RMSE      MAE      MPE     MAPE       ACF1 Theil's U
## Test set 19.31247 22.57404 19.31247 2.033524 2.033524 0.02753156  1.292138
accuracy(train.arima.pred$mean, valid.ts) # MAPE 1.430108
##                ME     RMSE      MAE      MPE     MAPE       ACF1 Theil's U
## Test set 13.64012 17.72267 13.64012 1.430108 1.430108 0.01580931  1.030082
accuracy(train.tbats.pred$mean, valid.ts) # MAPE 1.132376
##                ME     RMSE      MAE      MPE     MAPE         ACF1 Theil's U
## Test set 10.82925 15.15895 10.82925 1.132376 1.132376 -0.009468737 0.8841638
# Forecast using our best model
P_B.lm.trend.season <- tslm(P_B.window.ts ~ trend + I(trend^2) + season)
P_B.lm.trend.season.pred <- forecast(P_B.lm.trend.season, h = nValid, level = 0)
P_B.Arima <- Arima(P_B.lm.trend.season.pred$residuals, order = c(2,0,0))
P_B.Arima.pred <- forecast(P_B.Arima, h = 5)

plot(P_B.lm.trend.season.pred, include = 12)
lines(valid.ts)

P_B.lm.trend.season.pred$mean + P_B.Arima.pred$mean
##           Qtr1      Qtr2      Qtr3      Qtr4
## 2021                                984.2310
## 2022  987.2361  994.3101 1005.2300
# Check residuals
checkresiduals(naive.pred$residuals) 
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(snaive.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.lm.trend.season.pred$residuals) 
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.res.arima.pred$residuals) 
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(hwin.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.arima.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

checkresiduals(train.tbats.pred$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.